#Alexander F. Gazmararian
#afg2@princeton.edu

# Load packages
library(tidyverse)
library(scales)
library(MASS)
library(lmtest)
library(sandwich)
library(modelsummary)
library(ggpubr)
library(margins)
library(mediation)
library(kableExtra)
library(broom)

# Set seed
set.seed(10)

# Load functions
source("code/fun/savefig.r")
source("code/fun/get_med_ci.r") # function to get 90 and 95% intervals from mediation analysis
source("code/fun/book_theme.r")
source("code/fun/fix_txt.r")

#specify baseline model
source("code/fun/model_spec.r")

#adjust baseline model for youth sample
f.base <- update(f.base, ~ . + pc1 + youth_block - CollegeDegree + HighestEd + mobility_f)

# Define treatment conditions
treat <- c("retrain_treat", "retrain_treat_pool", "loan_condition", "cost_condition")  #specify treatments for subsequent models

#load coefficient names
source("code/fun/coefnames4tables.r")
coefnames <- c("loan_condition"="Loan Treatment", coefnames, "support_train_scale"="Expect Support (Mediator)", "worthit"="Worth It (Mediator)")

#load data
g <- readRDS("data/NatYouthQual.rds")
g <- subset(g, cost_condition == 0) # Analyze baseline condition with cost at 0

# Function to recode model names for the plot
recode_retrain <- function(var) {
  require(dplyr)
  var1 <- dplyr::recode(
    factor(var),
    "retrain_treat" = "Conditional Loan x High Budget\nvs.\nUnconditional Loan x Low Budget",
    "retrain_treat_pool" = "Pooled",
    "loan_condition" = "Conditional Loan\nvs.\nUnconditional Loan",
    "cost_condition" = "High Budget\nvs.\nLow Budget"
  )
  return(factor(var1, ordered = TRUE, levels = c("Conditional Loan x High Budget\nvs.\nUnconditional Loan x Low Budget", "Pooled", "Conditional Loan\nvs.\nUnconditional Loan", "High Budget\nvs.\nLow Budget")))
}

#Estimate linear model ----

ols.out <- list()
ols.out[["Index"]] <- lm(update(f.base, train_index_med ~ . + loan_condition), g)
ols.out[["Enroll"]] <- lm(update(f.base, enroll_i ~ . + loan_condition), g)
ols.out[["Recommend"]] <- lm(update(f.base, recommend_i ~ . + loan_condition), g)
ols.out[["Find Jobs"]] <- lm(update(f.base, jobs_percent_z ~ . + loan_condition), g)
ols.out[["Support"]] <- lm(update(f.base, support_train_scale ~ loan_condition + .), g)
ols.out[["Index (M1)"]] <- lm(update(f.base, train_index_med ~ loan_condition + support_train_scale + .), g)
ols.out[["Worth It"]] <- lm(update(f.base, worthit ~ . + loan_condition), g)
ols.out[["Index (M2)"]] <- lm(update(f.base, train_index_med ~ . + worthit + loan_condition), g)

# Check to make sure results are the same with and without imputation of partisanship
###index outcome
lm(update(f.base, train_index_med ~ . + loan_condition), g) %>% summary() #without imputation
lm(update(f.base, train_index_med ~ . - PartySummary + PartySummary_impute + loan_condition), g) %>% summary() #with imputation
#enroll outcome
lm(update(f.base, enroll_i ~ . + loan_condition), g) %>% summary() #without imputation
lm(update(f.base, enroll_i ~ . - PartySummary + PartySummary_impute + loan_condition), g) %>% summary() #with imputation
#recommend outcome
lm(update(f.base, recommend_i ~ . + loan_condition), g) %>% summary() #without imputation
lm(update(f.base, recommend_i ~ . - PartySummary + PartySummary_impute + loan_condition), g) %>% summary() #with imputation
#find jobs outcome
lm(update(f.base, jobs_percent_z ~ . + loan_condition), g) %>% summary() #without imputation
lm(update(f.base, jobs_percent_z ~ . - PartySummary + PartySummary_impute + loan_condition), g) %>% summary() #with imputation
#support index for mediation analysis
lm(update(f.base, train_index_med ~ . + loan_condition + support_train_scale), g) %>% summary() #without imputation
lm(update(f.base, jobs_percent_z ~ . - PartySummary + PartySummary_impute + loan_condition + support_train_scale), g) %>% summary() #with imputation
#worth it mediation analysis
lm(update(f.base, train_index_med ~ . + loan_condition + worthit), g) %>% summary() #without imputation
lm(update(f.base, jobs_percent_z ~ . - PartySummary + PartySummary_impute + loan_condition + worthit), g) %>% summary() #with imputation

# Create online appendix table----
file <- "tables/ch7/ols_loancost.txt"
modelsummary(
  ols.out,
  vcov = "HC1",
  stars = c("*"=.1,"**"=.05,"***"=.01),
  coef_map = coefnames,
  coef_omit = "Income",
  gof_map = c("nobs", "adj.r.squared"),
  escape = FALSE,
  output = "latex"
) %>%
  add_header_above(c(" " = 1, "Main Outcomes" = 4, "Mediation" = 4)) %>%
  cat(., file = file)
fix_txt(file)

#Create Figure 7.3 ----
# Extract estimates
main.df <- lapply(ols.out, function(x) tidy(coeftest(x, vcov. = vcovHC(x, "HC1"))))
main.df <- map_df(main.df, ~as.data.frame(.x), .id = "outcome")
main.df <- subset(main.df, term == "loan_condition" & outcome %in% c("Index", "Support", "Worth It"))
# Construct confident interval
main.df$ub95 <- with(main.df, estimate + std.error * qnorm(0.975))
main.df$lb95 <- with(main.df, estimate + std.error * qnorm(0.025))
main.df$ub90 <- with(main.df, estimate + std.error * qnorm(0.95))
main.df$lb90 <- with(main.df, estimate + std.error * qnorm(0.05))
# Create plot
main.plot <-
  main.df %>%
  dplyr::mutate(
    outcome = dplyr::case_when(
      outcome == "Worth It" ~ "Mediator: Commitment worth the cost",
      outcome == "Support" ~ "Mediator: Government supportive of training",
      outcome == "Index" ~ "Local investment efficacy index"
    ),
    outcome = factor(
      outcome,
      ordered = TRUE,
      levels = c("Mediator: Commitment worth the cost", "Mediator: Government supportive of training", "Local investment efficacy index")
    )
  ) %>%
  ggplot(aes(y = outcome, x = estimate)) +
  geom_vline(xintercept = 0, lty = "dashed", color = "gray") +
  geom_errorbar(aes(xmin = lb95, xmax = ub95), width = 0, position = position_dodge(.5)) +
  geom_errorbar(aes(xmin = lb90, xmax = ub90), width = 0, position = position_dodge(.5), size = 1.75) +
  geom_point(size = 4) +
  scale_color_grey() +
  book_theme +
  labs(y = "", x = "Average treatment effect", color = "Treatment", shape = "Treatment",
       caption = "Notes: Thin and thick bars denote 95 and 90 percent confidence intervals") +
  theme(plot.margin = margin(r=5))
main.plot
savefig(main.plot, "7.3_fig_loancost", height = 1.5, filepath = "figures/")

# Reversibility mediation analysis----
med.main <- mediate(
  ols.out$Support,
  ols.out$`Index (M1)`,
  treat="loan_condition",
  mediator="support_train_scale",
  control.value=0,
  treat.value=1,
  dropobs=TRUE,
  sims=2000
  )
summary(med.main)

# Create plot
med.df <- get_med_ci(med.main)
med.plot <-
  med.df %>%
  ggplot(aes(x=outcome,y=estimate)) +
  geom_hline(yintercept=0,lty="dashed",color="grey") +
  geom_errorbar(aes(ymin=lb95,ymax=ub95),width=0) +
  geom_errorbar(aes(ymin=lb90,ymax=ub90),width=0,size=1.75) +
  geom_point(size=4) +
  book_theme +
  coord_flip() +
  labs(x="",y="Estimate") +
  theme(plot.margin = margin(r=7))
med.plot
savefig(med.plot, "si_med_support", height = 1.5, filepath = "figures/ch7/")

# Sequential ignorability analysis
sens.out <- medsens(med.main, rho.by = 0.1, effect.type = "indirect", sims = 2000)
summary(sens.out)

# Create figure
png("figures/ch7/si_med_support_seqignore.png", width = 5.5, height = 4, units = "in", res = 300, pointsize = 10)
plot(sens.out, sens.par = "R2")
dev.off()

# Worth it mediation analysis----
med.worth <- mediate(
  ols.out$`Worth It`,
  ols.out$`Index (M2)`,
  treat="loan_condition",
  mediator="worthit",
  control.value=0,
  treat.value=1,
  dropobs=TRUE,
  sims=2000
  )
summary(med.worth)

# Create plot
med.worth.df <- get_med_ci(med.worth)
med.worth.plot <-
  med.worth.df %>%
  ggplot(aes(x=outcome,y=estimate)) +
  geom_hline(yintercept=0,lty="dashed",color="grey") +
  geom_errorbar(aes(ymin=lb95,ymax=ub95),width=0) +
  geom_errorbar(aes(ymin=lb90,ymax=ub90),width=0,size=1.75) +
  geom_point(size=4) +
  book_theme +
  coord_flip() +
  labs(x="",y="Estimate") +
  theme(plot.margin = margin(r=7))
med.worth.plot
savefig(med.worth.plot, "si_med_worthit", height = 1.5, filepath = "figures/ch7/")

# Sequential ignorability analysis
sense.worth <- medsens(med.worth, rho.by = 0.1, effect.type = "indirect", sims = 2000)
summary(sense.worth)

# Create figure
png("figures/ch7/si_med_worthit_seqignore.png", width = 5.5, height = 4, units = "in", res = 300, pointsize = 10)
plot(sense.worth, sens.par = "R2")
dev.off()
